Loading packages for the plots

library(ggplot2)
<<<<<<< HEAD
library(plotly)
## Warning: package 'plotly' was built under R version 4.3.2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(flexdashboard)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.0
## ✔ readr     2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks plotly::filter(), stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(leaflet)

Reading in Cleaned Data

alcohol_data_2007 = read_csv("./data/PRAM_2007_alcohol.csv")
## Rows: 5515 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (16): LocationAbbr, Class, Topic, Question, DataSource, Response, Data_V...
## dbl  (6): Year, Data_Value, Low_Confidence_Limit, High_Confidence_Limit, Sam...
## lgl  (1): Data_Value_Std_Err
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
tobacco_data_2007 = read_csv("./data/PRAM_2007_tobacco.csv")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 42838 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (16): LocationAbbr, Class, Topic, Question, DataSource, Response, Data_V...
## dbl  (6): Year, Data_Value, Low_Confidence_Limit, High_Confidence_Limit, Sam...
## lgl  (1): Data_Value_Std_Err
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
no_contraception_data_2007 = read_csv("./data/PRAM_2007_no_contraception.csv")
## Rows: 6015 Columns: 27
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (19): LocationAbbr, LocationDesc, Class, Topic, Question, DataSource, Re...
## dbl  (6): Year, Data_Value, Low_Confidence_Limit, High_Confidence_Limit, Sam...
## lgl  (2): Data_Value_Footnote_Symbol, Data_Value_Std_Err
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
infant_mortality_df = read_csv("./data/PRAM_2007_infantmortality.csv")
## Rows: 1386 Columns: 27
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (19): LocationAbbr, LocationDesc, Class, Topic, Question, DataSource, Re...
## dbl  (6): Year, Data_Value, Low_Confidence_Limit, High_Confidence_Limit, Sam...
## lgl  (2): Data_Value_Footnote_Symbol, Data_Value_Std_Err
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# cleaned alcohol data 
=======
library(plotly)
library(flexdashboard)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.1.3
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.3
## Warning: package 'tibble' was built under R version 4.1.3
## Warning: package 'tidyr' was built under R version 4.1.3
## Warning: package 'readr' was built under R version 4.1.3
## Warning: package 'purrr' was built under R version 4.1.3
## Warning: package 'stringr' was built under R version 4.1.3
## Warning: package 'forcats' was built under R version 4.1.3
## Warning: package 'lubridate' was built under R version 4.1.3
library(leaflet)

Reading in Cleaned Data

alcohol_data_2007 = read_csv("./data/PRAM_2007_alcohol.csv")

tobacco_data_2007 = read_csv("./data/PRAM_2007_tobacco.csv")

no_contraception_data_2007 = read_csv("./data/PRAM_2007_no_contraception.csv")

infant_mortality_df = read_csv("./data/PRAM_2007_infantmortality.csv")

# cleaned alcohol data 
>>>>>>> 94e7f9778c789b861d08491901c0e1020c6aa653
cleaned_alc_2007 <- alcohol_data_2007 |>
  janitor::clean_names() |>
  select(-data_value_std_err, -data_value_type) |>
  filter(response != "DRINKER WHO QUIT") |>
  filter(response != "NONDRINKER") |>
  filter( response != "NO") |>
  drop_na(response,geolocation) |>
  separate(geolocation, into = c("latitude", "longitude"), sep = ", ", convert = TRUE) |>
   mutate(latitude = as.numeric(str_replace_all(latitude, "\\(|\\)", "")),  # Convert to numeric and remove parentheses
         longitude = as.numeric(str_replace_all(longitude, "\\(|\\)", "")))  # Convert to numeric and remove parentheses

# cleaned tobacco data 

cleaned_tobac_2007 <- tobacco_data_2007 |>
  janitor::clean_names() |>
  select(-data_value_type) |>
  filter(response != "SMOKER WHO QUIT") |>
  filter(response != "NONSMOKER") |>
  filter(response != "None (0 cig)") |>
  filter( response != "NO") |>
  drop_na(response, geolocation) |>
  separate(geolocation, into = c("latitude", "longitude"), sep = ", ", convert = TRUE) |>
   mutate(latitude = as.numeric(str_replace_all(latitude, "\\(|\\)", "")),  # Convert to numeric and remove parentheses
<<<<<<< HEAD
         longitude = as.numeric(str_replace_all(longitude, "\\(|\\)", "")))  # Convert to numeric and remove parentheses
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1 rows [13248].
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `latitude = as.numeric(str_replace_all(latitude, "\\(|\\)",
##   ""))`.
## Caused by warning:
## ! NAs introduced by coercion
no_alcohol_data_2007 = read_csv("./data/PRAM_2007_no_alcohol.csv")
## Rows: 54 Columns: 27
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (18): LocationAbbr, LocationDesc, Class, Topic, Question, DataSource, Re...
## dbl  (6): Year, Data_Value, Low_Confidence_Limit, High_Confidence_Limit, Sam...
## lgl  (3): Data_Value_Footnote_Symbol, Data_Value_Footnote, Data_Value_Std_Err
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
no_tobacco_data_2007 = read_csv("./data/PRAM_2007_no_tobacco.csv")
## Rows: 54 Columns: 27
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (18): LocationAbbr, LocationDesc, Class, Topic, Question, DataSource, Re...
## dbl  (6): Year, Data_Value, Low_Confidence_Limit, High_Confidence_Limit, Sam...
## lgl  (3): Data_Value_Footnote_Symbol, Data_Value_Footnote, Data_Value_Std_Err
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
contraception_data_2007 = read_csv("./data/PRAM_2007_contraception.csv")
## Rows: 12030 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (16): LocationAbbr, Class, Topic, Question, DataSource, Response, Data_V...
## dbl  (6): Year, Data_Value, Low_Confidence_Limit, High_Confidence_Limit, Sam...
## lgl  (1): Data_Value_Std_Err
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# cleaned no alcohol data 
=======
         longitude = as.numeric(str_replace_all(longitude, "\\(|\\)", "")))  # Convert to numeric and remove parentheses
  
no_alcohol_data_2007 = read_csv("./data/PRAM_2007_no_alcohol.csv")

no_tobacco_data_2007 = read_csv("./data/PRAM_2007_no_tobacco.csv")

contraception_data_2007 = read_csv("./data/PRAM_2007_contraception.csv")

# cleaned no alcohol data 
>>>>>>> 94e7f9778c789b861d08491901c0e1020c6aa653

cleaned_no_alc_2007 <- no_alcohol_data_2007 |>
  janitor::clean_names() |>
  select(-data_value_std_err, -geolocation, -data_value_type) |>
  drop_na(response)

view(cleaned_no_alc_2007)

# cleaned no tobacco data 

cleaned_no_tobacco_2007 <- no_tobacco_data_2007 |>
  janitor::clean_names() |>
  select(-data_value_std_err, -geolocation, -data_value_type) |>
  drop_na(response)

# cleaned infant mortality 

cleaned_infant_mortality <- infant_mortality_df |>
  janitor::clean_names() |>
  select(-data_value_std_err, -data_value_type, -data_value_unit, -data_value_footnote_symbol, -data_value_footnote) |>
  drop_na(response, geolocation) |>
  separate(geolocation, into = c("latitude", "longitude"), sep = ", ", convert = TRUE) |>
   mutate(latitude = as.numeric(str_replace_all(latitude, "\\(|\\)", "")),  # Convert to numeric and remove parentheses
         longitude = as.numeric(str_replace_all(longitude, "\\(|\\)", "")))  # Convert to numeric and remove parentheses

# cleaned conception

cleaned_contraception_2007 <- contraception_data_2007 |>
  janitor::clean_names() |>
  select(-data_value_std_err, -geolocation, -data_value_type) |>
  filter(response != "YES (CHECKED)") |>
  filter(response != "YES") |>
  drop_na(response)

# cleaned non conception

cleaned_no_contra_2007 <- no_contraception_data_2007 %>%
  janitor::clean_names() %>%
  select(-data_value_type) %>%
  drop_na(response) |>
  separate(geolocation, into = c("latitude", "longitude"), sep = ", ", convert = TRUE) |>
   mutate(latitude = as.numeric(str_replace_all(latitude, "\\(|\\)", "")),  # Convert to numeric and remove parentheses
         longitude = as.numeric(str_replace_all(longitude, "\\(|\\)", "")))  # Convert to numeric and remove parentheses

Plot 1: Alcohol Consumption in relation to Infant Mortality

cleaned_infant_mortality <- infant_mortality_df |>
  janitor::clean_names() |>
  select(-data_value_std_err, -data_value_type, -data_value_unit, -data_value_footnote_symbol, -data_value_footnote) |>
  drop_na(response)

# Plot of question and responses for alcohol

cleaned_alc_2007 |>
  ggplot(aes(x = question, fill = response)) +
  geom_bar(position = "dodge") +
  labs(title = "Questions and Responses", x = "Questions", y = "Count") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  labs(
    x = "Question",
    y = "Response",
    title =  "Questions vs Response of Alcohol Consumption"
  )

# creating "yes" variable 


# plot showing infant mortality rate vs alcohol consumption
ggplot() +
  geom_point(data = cleaned_alc_2007, aes(x = question, y = response), color = "blue", size = 3) +
  geom_point(data = cleaned_infant_mortality, aes(x = question, y = response), color = "red", size = 3) +
  labs(title = "Scatter Plot of Two Variables from Different Datasets",
       x = "X-axis Label",
       y = "Y-axis Label") +
  theme_minimal()

Plot 2: Tobacco Consumption in relation to Infant Mortality

Plot 3: No Consumption in relation to Infant Mortality

Map of Maternal Alcohol Use

leaflet() |> 
  addTiles() |> 
  addCircleMarkers(data = cleaned_alc_2007,
                   lng = ~longitude,  # Adjust column name if needed
                   lat = ~latitude,   # Adjust column name if needed
                   label = ~location_abbr,   # Assuming 'Group.1' is a column in your data
                   radius = 7,
                   color = "orange",
                   stroke = TRUE,
                   fillOpacity = 0.75,
                   popup = ~paste("Response:", response)) 
<<<<<<< HEAD
======= <<<<<<< HEAD
=======
>>>>>>> 94e7f9778c789b861d08491901c0e1020c6aa653 >>>>>>> 87eda6801483a374e516ba8f783995c3d11f822b

Map of Maternal Tobacco use

leaflet() |> 
  addTiles() |> 
  addCircleMarkers(data = cleaned_tobac_2007,
                   lng = ~longitude,  # Adjust column name if needed
                   lat = ~latitude,   # Adjust column name if needed
                   label = ~location_abbr,   # Assuming 'Group.1' is a column in your data
                   radius = 7,
                   color = "orange",
                   stroke = TRUE,
                   fillOpacity = 0.75,
                   popup = ~paste("Response:", response)) 
<<<<<<< HEAD
======= <<<<<<< HEAD
## Warning in validateCoords(lng, lat, funcName): Data contains 1 rows with either
## missing or invalid lat/lon values and will be ignored
=======
>>>>>>> 94e7f9778c789b861d08491901c0e1020c6aa653 >>>>>>> 87eda6801483a374e516ba8f783995c3d11f822b

Map of Infant Mortality Rate